# Packages
library(tidyverse)
library(naniar)
library(tibble)
library(visdat)


# Setup
load('data/data.RData')

data %>%
  mutate(
    SO = case_when(
      SO == "Austral: Brazilian Journal of Strategy and International Relations" ~ "Austral",
      SO == "Brazilian Journal of International Relations" ~ "BJIR",
      SO == "Conjuntura Austral: Journal of the Global South" ~ "Conjuntura Austral",
      SO == "Estudos Internacionais: Revista de Rela??es Internacionais da PUC Minas" ~"Estudos Internacionais",
      SO == "Meridiano 47 - Journal of Global Studies" ~ "Meridiano 47",
      SO == "Mon??es: Revista de Rela??es Internacionais da UFGD" ~ "Mon??es",
      SO == "Oikos - Revista de Economia Politica Internacional" ~ "Oikos",
      SO == "Revista Brasileira de Estudos de Defesa" ~ "RBED",
      SO == "Revista Brasileira de Pol?tica Internacional" ~ "RBPI",
      TRUE ~ SO
      
    )
  ) %>% 
  filter(PY < 2022) -> data



# 1.Table 1: Missingness by variable
data %>%
  select(AB, DE, CR, BP_EP,LA,DI,SO) -> data_na1
data_na1 %>%
  select(-SO) %>%
  miss_var_summary(.) %>%
  mutate(
    pct_miss = round(pct_miss,2)
  ) -> table_1


# 1.2 - Adding gender
data %>%
  separate_rows(GND, sep = ";") %>%
  mutate(GND = na_if(GND,"NA")) %>%
  select(GND) %>%
  miss_var_summary() %>%
  mutate(SO = "Total") -> tot_gnd

tot_gnd %>%
  select(-SO) %>%
  bind_rows(table_1,.) -> table_1

# 1.3 - Adding filiation


data %>%
  mutate(OG_NA = if_else(str_detect(OG,"NA"),TRUE,FALSE)) %>%
  mutate(OG_NA = if_else(is.na(OG_NA), TRUE, FALSE)) %>%
  group_by(OG_NA) %>%
  count() %>%
  mutate(pct_miss = round(n/4956 * 100,2)) %>%
  filter(OG_NA == TRUE) %>%
  rename("n_miss" = n) %>%
  mutate(variable = "OG") %>%
  ungroup() %>%
  select(variable, n_miss,pct_miss) -> tot_og
tot_og %>%
  bind_rows(table_1,.) %>%
  mutate(pct_miss = round(pct_miss,2)) -> table_1


# 2. Table 2 - Missingness by Journal
data_na1 %>%
  group_by(SO) %>%
  miss_var_summary(.) %>%
  mutate(
    pct_miss = round(pct_miss,2)
  ) %>%
  pivot_wider(names_from = "variable", values_from = c(pct_miss,n_miss)) %>%
  select(SO, n_miss_CR, pct_miss_CR,n_miss_BP_EP, pct_miss_BP_EP,n_miss_DE,pct_miss_DE,n_miss_AB,pct_miss_AB,n_miss_LA,pct_miss_LA,n_miss_CR,pct_miss_CR) -> table_2



# 2.1 - Adding Gender
data %>%
  separate_rows(GND, sep = ";") %>%
  mutate(GND = na_if(GND,"NA")) %>%
  select(GND) %>%
  miss_var_summary() %>%
  mutate(SO = "Total") -> tot_gnd



data %>%
  separate_rows(GND, sep = ";") %>%
  mutate(GND = na_if(GND,"NA")) %>%
  select(GND, SO) %>%
  group_by(SO) %>%
  miss_var_summary() %>%
  bind_rows(., tot_gnd) %>%
  filter(SO != "Total") %>%
  mutate(pct_miss = round(pct_miss,2)) %>%
  rename(
    "n_miss_GND" = n_miss,
    "pct_miss_GND" = pct_miss
  ) %>%
  select(-variable) %>%
  left_join(table_2,.) -> table_2


# 2.2 - Adding Filiation

data %>%
  group_by(SO) %>%
  count() %>%
  rename("tot_articles" = n) -> tot_articles_per_SO

data %>%
  mutate(OG_NA = if_else(str_detect(OG,"NA"),TRUE,FALSE)) %>%
  mutate(OG_NA = if_else(is.na(OG_NA), TRUE, FALSE)) %>%
  group_by(OG_NA) %>%
  count() %>%
  mutate(pct_miss = round(n/4956 * 100,2)) %>%
  filter(OG_NA == TRUE) %>%
  rename("n_miss" = n) %>%
  mutate(variable = "OG") %>%
  ungroup() %>%
  select(variable, n_miss,pct_miss) -> tot_og


data %>%
  mutate(OG_NA = if_else(str_detect(OG,"NA"),TRUE,FALSE)) %>%
  mutate(OG_NA = if_else(is.na(OG_NA), TRUE, FALSE)) %>%
  group_by(OG_NA, SO) %>%
  count() %>%
  filter(OG_NA == TRUE) %>%
  rename("n_miss_OG" = n) %>%
  left_join(.,tot_articles_per_SO, by = "SO") %>%
  mutate(
    pct_miss_OG = round(n_miss_OG/tot_articles,2) * 100
  ) %>%
  ungroup() %>%
  select(-tot_articles, -OG_NA) %>%
  left_join(table_2, .) -> table_2


data_na1 %>% 
  group_by(SO) %>% 
  select(AB,SO,DI) %>% 
  miss_var_summary(.) %>% 
  filter(variable == "DI") %>% 
  mutate(pct_miss = round(pct_miss,2)) %>% View
